
********************************************************************************
//Grades in high school
cd $pathdata_processed
use terminales, clear

cd $pathdata_education
merge 1:m npid using tab_karakter_vgskole_2003-2018.dta, keep(1 3) nogen keepusing(skolear vide)

gen year = real(substr(skolear, 1, 4))
gen t = year - year_vg3 + 3
duplicates drop
keep if inlist(t, 1, 2, 3)
drop skolear year
gsort npid t -vide
bysort npid t: keep if _n == 1

reshape wide vide, i(npid) j(t)
cd $pathdata_processed
drop year
merge 1:1 npid using terminales, keep(2 3) nogen

order npid year_vg3 komp_vg3 nus2000_vg3 lopenr_kurs_vg3 fylke_vg3 skolekom_vg3 public_vg3 skole_foretak_vg3 heldel_vg3 status_vg3 N_tilg_vg3 N_year_vg3 nb_komp ///
	  vide*1 vide*2 vide*3 
	
save terminales, replace
********************************************************************************
//Grades at the end of the 3rd year
cd $pathdata_processed
use terminales, clear

cd $pathdata_education
merge 1:m npid using tab_karakter_vgskole_2003-2018.dta, keep(1 3) nogen keepusing(skolear fagkode stp skr mun)

gen year = real(substr(skolear, 1, 4))
drop if year>year_vg3 | missing(year)

foreach var in stp skr mun {
	replace `var' = "1" if inlist(`var', "0", "9", "IV", "IM", "ID", "IB", "IG", "X", "Z")
}
	
foreach var in stp skr mun {
	gen `var'_real = real(`var')
	replace `var'_real = . if !inlist(`var'_real, 1, 2, 3, 4, 5, 6)
	}
	
gsort npid fagkode -year -skr_real -mun_real -stp_real	
bysort npid fagkode: keep if _n == 1

foreach var in stp skr mun {	
	bysort npid: egen sum_`var' = sum(`var'_real)
	bysort npid: egen n_`var' = count(`var'_real)
	}
	
order stp_real skr_real mun_real
	
gen admission_score_vg3 = ((sum_stp+sum_skr+sum_mun)/(n_stp+n_skr+n_mun))*10	
		
keep npid admission_score_vg3 
duplicates drop

cd $pathdata_processed
merge 1:1 npid using terminales, keep(2 3) nogen

order npid year_vg3 komp_vg3 nus2000_vg3 lopenr_kurs_vg3 fylke_vg3 skolekom_vg3 public_vg3 skole_foretak_vg3 heldel_vg3 status_vg3 N_tilg_vg3 N_year_vg3 nb_komp ///
	  vide*1 vide*2 vide*3 admission_score_vg3 
	
save terminales, replace
********************************************************************************
//Grades including repetition and private courses
cd $pathdata_processed
use terminales, clear

cd $pathdata_education
merge 1:m npid using tab_karakter_vgskole_2003-2018.dta, keep(1 3) nogen keepusing(skolear fagkode stp skr mun)

gen year = real(substr(skolear, 1, 4))
drop if year>(year_vg3+9) | missing(year) //+9 years after start of vg3 (=+8 years after the exams) is the last year of observation for the 5 cohorts

foreach var in stp skr mun {
	replace `var' = "1" if inlist(`var', "0", "9", "IV", "IM", "ID", "IB", "IG", "X", "Z")
}
	
foreach var in stp skr mun {
	gen `var'_real = real(`var')
	replace `var'_real = . if !inlist(`var'_real, 1, 2, 3, 4, 5, 6)
	}
	
gsort npid fagkode -year -skr_real -mun_real -stp_real	
bysort npid fagkode: keep if _n == 1

foreach var in stp skr mun {	
	bysort npid: egen sum_`var' = sum(`var'_real)
	bysort npid: egen n_`var' = count(`var'_real)
	}
	
order stp_real skr_real mun_real
		
gen admission_score = ((sum_stp+sum_skr+sum_mun)/(n_stp+n_skr+n_mun))*10	
		
keep npid admission_score 
duplicates drop

cd $pathdata_processed
merge 1:1 npid using terminales, keep(2 3) nogen

order npid year_vg3 komp_vg3 nus2000_vg3 lopenr_kurs_vg3 fylke_vg3 skolekom_vg3 public_vg3 skole_foretak_vg3 heldel_vg3 status_vg3 N_tilg_vg3 N_year_vg3 nb_komp ///
	  vide*1 vide*2 vide*3 admission_score_vg3 admission_score 
	
save terminales, replace
********************************************************************************
//Middle school GPA
cd $pathdata_processed
use npid using terminales, clear

cd $pathdata_education
merge 1:m npid using f_kurs_1974-2018, keepusing(grunn) keep(1 3) nogen

bysort npid: egen ms_gpa = max(grun)
keep npid ms_gpa
duplicates drop

cd $pathdata_processed
merge 1:1 npid using terminales, keep(2 3) nogen
replace ms_gpa = . if ms_gpa == 0 | ms_gpa>66
save terminales, replace
********************************************************************************
//Gender, yob + ids of parents
cd $pathdata_processed
use terminales, clear

cd $pathdata_population
merge 1:m npid using slekt_1934-2019.dta, keep(1 3) nogen keepusing(bmonth bsex nmpid nfpid)
 
gen yob = real(substr(string(bmonth), 1, 4))
gen mob = real(substr(string(bmonth), 5, 2))

gen female = bsex == 2
replace female = . if missing(bsex)
 
keeporder npid yob mob female nmpid nfpid ms_gpa year_vg3 komp_vg3 nus2000_vg3 lopenr_kurs_vg3 fylke_vg3 skolekom_vg3 public_vg3 skole_foretak_vg3 heldel_vg3 status_vg3 N_tilg_vg3 N_year_vg3 nb_komp ///
	  vide*1 vide*2 vide*3 admission_score_vg3 admission_score 
	    	   
cd $pathdata_processed
save terminales, replace
********************************************************************************	 
//Short term completion
cd $pathdata_processed
use terminales, clear

keep npid lopenr_kurs nus2000_vg3

replace lopenr_ = "P" + substr(lopenr_, 2, 9)
rename lopenr_kurs lopenr_kurs

cd $pathdata_education
merge 1:m npid lopenr_kurs using f_demo_1974-2018.dta, keep(1 3) 
gen completion = _merge == 3
keep npid completion
duplicates drop

cd $pathdata_processed
merge 1:1 npid using terminales, keep(2 3) nogen
save terminales, replace
********************************************************************************
//Enrollments in HE 
cd $pathdata_education
use npid lopenr_kurs hoved hskode hskodedato nus2000 uhgruppe kltrinn2000 kltrinn2000dato skolekom using f_kurs_1974-2018.dta, clear

keep if hoved == 3
drop if missing(lopenr_kurs)
replace hskode = . if hskode == 999
drop if missing(hskode)
drop if missing(hskodedato)
drop if missing(kltrinn2000)
drop if missing(kltrinn2000dato)
drop if missing(uhgruppe)

drop hskodedato 
keep if real(substr(string(kltrinn2000dato), 1, 4)) <= 2018
gen year = real(substr(string(kltrinn2000dato), 1, 4))
gen month = real(substr(string(kltrinn2000dato), 5, 2))

gen semestre = .
replace semestre = 1 if month>=7 & !missing(month)
replace semestre = 2 if month<7 & !missing(month)

duplicates drop

replace kltrinn2000 = . if kltrinn2000<=13
bysort nus2000 (kltrinn2000): replace kltrinn2000 = kltrinn2000[1] if missing(kltrinn2000)
replace kltrinn2000 = 14 if missing(kltrinn2000)

cd $pathdata_processed
save education_kurs, replace
*****************************************
*pannel with our sample
cd $pathdata_processed
use npid year_vg3 using terminales, clear
expand 9
bysort npid: gen year = year_vg3+_n
keep npid year

merge 1:m npid year using education_kurs, keep(1 3) nogen
save outcomes_education_e, replace
*****************************************
*outcomes for our sample: education enrollments
cd $pathdata_processed
use terminales, clear

keep npid year_vg3
duplicates drop
merge 1:m npid using outcomes_education_e, keep(1 3) nogen

//HE; first and last year in HE
gen he_e = hoved == 3
gen year_e = year if hoved == 3

bysort npid: egen he_e_ = max(he_e)
bysort npid: egen first_year_e_ = min(year_e)
bysort npid: egen last_year_e_ = max(year_e)

//Fields of study (QJE paper)
tostring nus2000, gen(nus2000_s) force

g byte field_det = .
replace field_det = 0 if nus2000_s=="619902"
replace field_det = 1 if inlist(substr(nus2000_s,1,2),"65","75") & !inlist(substr(nus2000_s,1,3),"654","754") & !inlist(uhgruppe,"05","05H","11","11B","55","55M")  // realfag - avgrense neste?
replace field_det = 2 if inlist(substr(nus2000_s,1,3),"654","754") // infotekn - avgrense?
replace field_det = 3 if inlist(uhgruppe,"05","05H","11","11B") // ingeniør 
replace field_det = 4 if inlist(substr(nus2000_s,1,2),"64","74") & !inlist(uhgruppe,"24","54","54M","75M") // økadm
replace field_det = 5 if inlist(uhgruppe,"24","54","54M","75M") // sivøk, inkl. master
replace field_det = 7 if inlist(substr(nus2000_s,1,4),"6372","7372")
replace field_det = 8 if inlist(substr(nus2000_s,1,3),"636","736")
replace field_det = 9 if inlist(substr(nus2000_s,1,4),"6352","7352")
replace field_det =30 if inlist(substr(nus2000_s,1,3),"635","735") & field_det!=9
replace field_det = 6 if inlist(substr(nus2000_s,1,2),"63","73") & !inlist(field_det,7,8,9,30)
replace field_det =10 if inlist(uhgruppe,"08","08B") // alm.lærer
replace field_det =11 if inlist(uhgruppe,"10","10B") // faglærer
replace field_det =12 if inlist(uhgruppe,"09","09B") // før.lærer
replace field_det =13 if inlist(substr(nus2000_s,1,3),"625") // videreutdanning
replace field_det =32 if inlist(substr(nus2000_s,1,4),"6238") // PPU - ikke komplett?
replace field_det =14 if inlist(substr(nus2000_s,1,2),"62","72") & !inlist(field_det,10,11,12,13,32)
replace field_det =15 if inlist(substr(nus2000_s,1,2),"61","71") & !inlist(substr(nus2000_s,1,3),"611","711") & nus2000_s!="619902"
replace field_det =16 if inlist(substr(nus2000_s,1,3),"611","711")
replace field_det =19 if inlist(nus2000_s,"662101","662102","662103","662107","662117","662118") // barnevern
replace field_det =20 if substr(nus2000_s,1,3)=="662" & field_det!=19 // sosionom, mm.
replace field_det =21 if inlist(nus2000_s,"669901","669936","669945") //audiogr.
replace field_det =22 if inlist(nus2000_s,"669906","669907","669912","669937","769905","769925") // bioing, inkl høyere grad RAB-fag
replace field_det =23 if inlist(substr(nus2000_s,1,4),"6651","7651") // ergoter.
replace field_det =24 if inlist(substr(nus2000_s,1,4),"6652","7651") // fysioter.
replace field_det =25 if inlist(substr(nus2000_s,1,4),"6611","7611") // sykepl
replace field_det =26 if inlist(substr(nus2000_s,1,4),"6613","7613") // vernepl
replace field_det =27 if inlist(substr(nus2000_s,1,3),"661","663","664","665","666","667","669") & !inrange(field_det,21,26)
replace field_det =28 if substr(nus2000_s,1,4)=="6441"
replace field_det =29 if substr(nus2000_s,1,2)=="67"
replace field_det =31 if inlist(substr(nus2000_s,1,3),"668","768") // idrett

replace field_det =41 if inlist(uhgruppe,"55","55M") // siv.ing
replace field_det =42 if inlist(uhgruppe,"35","35M") // jus
replace field_det =43 if inlist(uhgruppe,"36") // medisin
replace field_det =44 if inlist(uhgruppe,"50","50M") // odont
replace field_det =45 if inlist(uhgruppe,"47","47M") // farmasi
replace field_det =46 if substr(nus2000_s,1,2)=="76" & !inlist(field_det,43,44,45,46)
replace field_det =47 if substr(nus2000_s,1,4)=="7572"

replace field_det =99 if !inlist(substr(nus2000_s,1,1),"6","7","8") & nus2000_s!=""
replace field_det =90 if  inlist(substr(nus2000_s,1,1),"6","7","8") & field_det==.

g byte field_agg = .
replace field_agg = 0 if inlist(field_det,0) // ex.phil
replace field_agg = 1 if inlist(field_det,1,2,29) // sci
replace field_agg = 2 if inlist(field_det,3,41,47) // engineering
replace field_agg = 3 if inlist(field_det,4,5,28) // commerce
replace field_agg = 4 if inlist(field_det,6,7,8,9) // soc.sci
replace field_agg = 5 if inlist(field_det,10,11,12,13,14,32) // teaching
replace field_agg = 6 if inlist(field_det,15,16,17,18,30) // humanities
replace field_agg = 8 if inlist(field_det,19,20,21,22,23,24,25,26,27,31,43,44,45,46) // health
replace field_agg =42 if inlist(field_det,41,47) // civil engineering
replace field_agg =44 if inlist(field_det,42) // law
replace field_agg =48 if inlist(field_det,43,44,45,46) // health (medicine, dentistry, pharmaceutics)

replace field_agg =90 if field_det ==90
replace field_agg =99 if field_det ==99

cap label drop field_det
lab define field_det 0 "General intro course" ///
                     1 "Science" ///
                     2 "Computer Science" ///
                     3 "Engineering"  ///
                     4 "Commerce (not business)" ///
                     5 "Business"  ///
                     6 "Social Sciences" ///
                     7 "Criminology"  ///
                     8 "Psychology" ///
                     9 "Journalism"  ///
                     10 "Teachers College" ///
                     11 "Teacher, special subjects" ///
                     12 "Kindergarten teacher" ///
                     13 "One-year units for teachers" ///
                     14 "Other teaching"  ///
                     15 "Humanities" ///
                     16 "Languages"  /// ///
                     19 "Child welfare" ///
                     20 "Social work" ///
                     21 "Audiograph" ///
                     22 "Bio-Engineering" ///
                     23 "Ergotherapy"  ///
                     24 "Physiotherapy" ///
                     25 "Nurse"  ///
                     26 "Social educator (vernepl)" ///
                     27 "Other health" ///
                     28 "Tourism" ///
                     29 "Agriculture" ///
                     30 "Media" ///
                     31 "Sports" ///
                     32 "One-year Pedagogy" ///
                     41 "Civil Engineering" ///
                     42 "Law" ///
                     43 "Medicine" ///
                     44 "Dentistry" ///
                     45 "Pharmaceutics"  ///
                     46 "Other graduate health" ///
                     47 "Architecture" ///
                     90 "Other" ///
		     99 "No College"

cap label drop field_agg
lab define field_agg 0 "General intro course" ///
                     1 "Science" ///
                     2 "Engineering" ///
                     3 "Commerce" ///
                     4 "Social sci" ///
                     5 "Teaching"  ///
                     6 "Humanities"  ///
                     7 "Social work" ///
                     8 "Other Health" ///
                     9 "Tourism"  ///
                     10 "Agriculture" ///
                     11 "Media" ///
                     12 "Sports"  ///
                     42 "Techonology" ///
                     44 "Law"  ///
                     48 "Medicine" ///
                     90 "Other" ///
		     99 "No College"
			 
lab val field_det field_det
lab val field_agg field_agg

recode *field*agg* (6=1) (4=2) (5=3) (8=4) (1=5) (2=6) (42=7) (3=8) (44=9) (48=10)
lab def field_agg 0 "general_intro_course" 1 "humanities" 2 "social_science" 3 "teaching" 4 "health" 5 "science" ///
	6 "enigneering" 7 "technology" 8 "business" 9 "law" 10 "medicine" 90 "other" 99 "no_college",replace

lab val *field*agg* field_agg
replace field_agg = . if missing(nus2000)
decode field_agg, gen(field_agg_s)

foreach field in general_intro_course humanities social_science teaching health science	enigneering technology business law medicine other {
	gen `field' = field_agg_s == "`field'"
}

replace he_e = . if he_e == 0
foreach var in general_intro_course humanities social_science teaching health science enigneering technology business law medicine other {
	bysort npid (he_e kltrinn2000dato kltrinn2000 year hskode skolekom lopenr_k): gen `var'_f_he_ = `var'[1]
	replace `var'_f_he_ = . if he_e_ == 0
}

keep npid he_e_ first_year_e_ last_year_e_ general_intro_course_f_he_ humanities_f_he_ social_science_f_he_ teaching_f_he_ health_f_he_ science_f_he_ enigneering_f_he_ technology_f_he_ business_f_he_ law_f_he_ medicine_f_he_ other_f_he_
duplicates drop

merge 1:1 npid using terminales, keep(2 3) nogen
save terminales, replace
*****************************************
*Completion 
cd $pathdata_education
use npid lopenr_k bu bu_kltrinn bu_regdato using f_demo_1974-2018.dta, clear

drop if missing(lopenr_kurs)
keep if real(substr(string(bu_regdato), 1, 4)) <= 2018
duplicates drop

replace bu_kltrinn = . if bu_kltrinn<6
bysort bu (bu_kltrinn): replace bu_kltrinn = bu_kltrinn[1] if missing(bu_kltrinn)
replace bu_kltrinn = 6 if missing(bu_kltrinn)
duplicates drop

replace lopenr_ = "K" + substr(lopenr_, 2, 9)

cd $pathdata_education
merge 1:m npid lopenr_k using f_kurs_1974-2018.dta, keep(1 3) nogen keepusing(hoved hskode uhgruppe skolekom)
duplicates drop

gen year = real(substr(string(bu_regdato), 1, 4))

cd $pathdata_processed
save education_demo, replace
*****************************************
*pannel with our sample
use npid year_vg3 using terminales, clear
expand 9
bysort npid: gen year = year_vg3+_n
keep npid year

merge 1:m npid year using education_demo, keep(1 3) nogen
save outcomes_education_c, replace
********************************************************************************
//outcomes for our sample: completion
cd $pathdata_processed
use terminales, clear

keep npid year_vg3 
duplicates drop
merge 1:m npid using outcomes_education_c, keep(1 3) nogen

//HS; HE; Grade level in HE; Last year of completion
gen hs_gs_c = inlist(bu, 401101, 441106, 401111, 401112) 

gen he_c = hoved == 3
gen year_c = year if !missing(lopenr_kurs) 

foreach var in hs_gs_c he_c bu_kl year_c {
	bysort npid: egen `var'_ = max(`var')
}

replace bu_kl_ = 11 if missing(bu_kl_) | bu_kl_<13
replace year_c_ = year_vg3-2 if missing(year_c_)

keep npid hs_gs_c_ he_c_ bu_kl_ year_c_ 
duplicates drop

merge 1:1 npid using terminales, keep(2 3) nogen
save terminales, replace
********************************************************************************
//Selectivity of the first enrollment+share of available programs
*Peers
cd $pathdata_processed
use education_kurs, clear
keep if hoved == 3
keep if year>=2006
save sample_peers, replace //all students in HE

keep npid year
duplicates drop

rename year year_univ
bysort npid (year_univ): gen n = _n
save temp, replace

use temp, clear
sum n
local j `r(max)'
foreach i of numlist 1/`j' { //their GPA
	use temp, clear
	keep if n == `i'

	cd $pathdata_education
	merge 1:m npid using tab_karakter_vgskole_2003-2018.dta, keep(1 3) nogen keepusing(skolear fagkode stp skr mun) 

	gen year = real(substr(skolear, 1, 4))
	keep if real(substr(skolear, 5, 4)) <= year_univ
	
	count
	if `r(N)'>0 {
	foreach var in stp skr mun {
	replace `var' = "1" if inlist(`var', "0", "9", "IV", "IM", "ID", "IB", "IG", "X", "Z")
	}
		
	foreach var in stp skr mun {
		gen `var'_real = real(`var')
		replace `var'_real = . if !inlist(`var'_real, 1, 2, 3, 4, 5, 6)
		}

	gsort npid fagkode -year -skr_real -mun_real -stp_real	
	bysort npid fagkode: keep if _n == 1

	foreach var in stp skr mun {	
	bysort npid: egen sum_`var' = sum(`var'_real)
	bysort npid: egen n_`var' = count(`var'_real)
	}
	
	order stp_real skr_real mun_real
	
	gen admission_score = ((sum_stp+sum_skr+sum_mun)/(n_stp+n_skr+n_mun))*10
		
	keep npid year_univ admission_score 
	duplicates drop

	cd $pathdata_processed
	if `i' == 1 {
		save peers_scores, replace
	}	
	if `i' != 1 {
		append using peers_scores
		save peers_scores, replace
	}
	}
}
cd $pathdata_processed
use peers_scores, clear

rename year_univ year
merge 1:m npid year using sample_peers, keep(2 3) nogen

*min peer gpa
bysort year hskode kltrinn2000 nus2000: egen min_peernus2000 = min(admission_score)
save sample_peers, replace
********************************************************************************
*Selectivity of the first enrollment
keep year hskode kltrinn2000 nus2000 min_peernus2000
duplicates drop
save temp, replace

cd $pathdata_processed
use outcomes_education_e, clear
keep if hoved == 3

bysort npid (kltrinn2000dato kltrinn2000 year hskode skolekom lopenr_k): keep if _n == 1
keep npid year hskode kltrinn2000 nus2000

merge m:1 hskode year kltrinn2000 nus2000 using temp, keep(1 3) nogen
keep hskode year kltrinn2000 nus2000 min_peernus2000
duplicates drop

bysort year: egen rank = rank(min_peernus2000), track
bysort year: egen max = max(rank)
replace rank = (rank - 1)/(max -1) * 100

drop max
save selectivity, replace

cd $pathdata_processed
use outcomes_education_e, clear
keep if hoved == 3

bysort npid (kltrinn2000dato kltrinn2000 year hskode skolekom lopenr_k): keep if _n == 1
keep npid hskode year kltrinn2000 nus2000

merge m:1 hskode year kltrinn2000 nus2000 using selectivity, keep(1 3) nogen
keep npid rank
rename rank rank_f_he

merge 1:1 npid using terminales, keep(2 3) nogen
save terminales, replace
********************************************************************************
//Nb or share of available programs
use selectivity, clear
keep year min_peernus2000
bysort year (min_peernus2000): gen j = _n
reshape wide min, i(year) j(j)
save selectivity2, replace

cd $pathdata_processed
use outcomes_education_e, clear
keep if hoved == 3

bysort npid (kltrinn2000dato kltrinn2000 year hskode skolekom lopenr_k): keep if _n == 1
keep npid hskode year kltrinn2000 nus2000

merge 1:m npid hskode year kltrinn2000 nus2000 using sample_peers, keep(1 3) keepusing(admission_score) nogen
duplicates drop
keep npid year admission_score

merge m:1 year using selectivity2, nogen

ds min_peer*
local count: word count `r(varlist)'

gen nb_prog = 0
gen nb_available_prog = 0
forvalues i = 1(1)`count' {
	replace nb_prog = nb_prog + 1 if !missing(min_peernus2000`i')
	replace nb_available_prog = nb_available_prog + 1 if admission_score>=min_peernus2000`i' & !missing(min_peernus2000`i') & !missing(admission_score)	
}

replace nb_available_prog = . if missing(admission_score)
gen share_available_prog = nb_available_prog/nb_prog

keep npid nb_available_prog share_available_prog
merge 1:1 npid using terminales, keep(2 3) nogen
save terminales, replace
*****************************************
//Employment
use npid aar pers_alder arb_arbmark_status using ${pathdata_labor}/regsys_2000-2019.dta, clear

drop if missing(npid)

keep if inrange(pers_alder,16,74)

rename aar year

*employment status
rename arb_arbmark_status yrkstat
destring yrkstat, replace force
replace yrkstat = 0 if missing(yrkstat)	| yrkstat == 4 

keep npid year yrkstat
duplicates drop

cd $pathdata_processed
save employment, replace
*****************************************
*pannel with our sample
use npid year_vg3 using terminales, clear
expand 9
bysort npid: gen year = year_vg3+_n
keep if year<=2019
keep npid year

merge 1:m npid year using employment, keep(1 3) nogen
bysort npid year (yrk): keep if _n == 1 //only duplicates are for yrk == 1 | 2, which we treat similarly
save outcomes_employment, replace
********************************************************************************
//Earnings 
use npid wyrkinnt aargang using ${pathdata_income}/inntekt_1993-2018.dta,clear
rename aargang year
keep if year<=2018

gsort npid year -wyrkinnt 
bysort npid year: keep if _n == 1

cd $pathdata_processed
save earnings, replace
*****************************************
*pannel with our sample
use npid year_vg3 using terminales, clear
expand 9
bysort npid: gen year = year_vg3+_n
keep npid year

merge 1:1 npid year using earnings, keep(1 3) nogen
save outcomes_earnings, replace
********************************************************************************
//Inflation 
use outcomes_earnings, clear
cd $pathdata_public_access
merge m:1 year using inflation, keep(1 3) nogen

sum yavg if year == 2007
local pci_2007 = `r(mean)'

foreach var in wyrkinnt {
	gen `var'_c = `var'*(`pci_2007'/yavg)
	}
drop yavg
	
cd $pathdata_processed	
save outcomes_earnings, replace
********************************************************************************	
//Outcome for our sample: year first job + employment 8 years after the exams + earnings 1st job & 9 years later
use terminales, clear

keep npid year_c_ year_vg3
merge 1:m npid using outcomes_employment, keep(1 3) nogen
merge 1:1 npid year using outcomes_earnings, keep(1 3) nogen

gen first_job = inlist(yrkstat, 1, 2) & year>year_c_ & wyrkinnt>0 & !missing(wyrkinnt)

gen year2_1st_job = year if first_job == 1
bysort npid: egen year_1st_job = min(year2_1st_job)
drop year2_1st_job

replace first_job = 0 if year != year_1st_job

foreach var in first_job {
	bysort npid: egen `var'_ = max(`var')
}

gen still_educ = year_c_>=year_vg3+9

gen no_first_job = still_educ == 0 & first_job_ == 0

foreach var in wyrkinnt wyrkinnt_c {
	gen `var'1 = `var' if year==year_1st
	bysort npid: egen `var'1_ = max(`var'1)
	drop `var'1		 
}	
bysort npid (year): gen t = _n

keep if t == 9

gen job10 = inlist(yrkstat, 1, 2) & year>year_c_ & wyrkinnt>0 & !missing(wyrkinnt)

foreach var in wyrkinnt wyrkinnt_c {
	rename `var' `var'10_
	replace `var'10_ = . if job10 != 1
}

keeporder npid first_job_ still_educ no_first_job year_1st_job wyrkinnt1_ wyrkinnt_c1_ job10 wyrkinnt10_ wyrkinnt_c10_

cd $pathdata_processed
merge 1:1 npid using terminales, keep(2 3) nogen

save terminales, replace
********************************************************************************
//Additional controls and baseline characteristics
*mother info
cd $pathdata_processed
use nmpid year_vg3 if !missing(nmpid) using terminales, clear
duplicates drop

*yob
rename nmpid npid

save mothers, replace

keep npid 
duplicates drop

cd $pathdata_population
merge 1:m npid using slekt_1934-2019.dta, keep(1 3) nogen keepusing(bmonth)
 
gen yob = real(substr(string(bmonth), 1, 4))
drop bmonth
 
cd $pathdata_processed
merge 1:m npid using mothers, nogen
save mothers, replace

*education
keep npid year_vg3
bysort npid (year_vg3): gen n = _n
save temp, replace

sum n
local j `r(max)'
foreach i of numlist 1/`j' {
	use temp, clear
	keep if n == `i'

	merge 1:m npid using education_demo, keep(1 3) nogen
	keep if year<year_vg3

	foreach var in bu_kl {
		bysort npid: egen `var'_ = max(`var')
	}

	keep npid year_vg3 bu_kl_ 
	duplicates drop

	if `i' == 1 {
		save mothers_e, replace
	}
	if `i' != 1 {
		append using mothers_e
		save mothers_e, replace
	}
}

merge 1:1 npid year_vg3 using mothers, nogen
save mothers, replace

*income
keep npid year_vg3
bysort npid (year_vg3): gen n = _n
save temp, replace

sum n
local j `r(max)'
foreach i of numlist 1/`j' {
	use temp, clear
	keep if n == `i'

	expand 5
	bysort npid: gen aargang = year_vg3-6+_n

	merge 1:m npid aargang using ${pathdata_income}/inntekt_1993-2018.dta, keep(1 3) nogen keepusing(wyrkinnt aargang)
	
	bysort npid aargang (wyrkinnt): keep if _n == 1
	rename aargang year

	cd $pathdata_public_access
	merge m:1 year using inflation, keep(1 3) nogen

	preserve
	use inflation, clear
	sum yavg if year == 2007
	local pci_2007 = `r(mean)'
	restore

	foreach var in wyrkinnt {
		gen `var'_c = `var'*(`pci_2007'/yavg)
		}
	drop yavg

	foreach var in wyrkinnt_c {
		replace `var' = 0 if missing(`var')
	}
	
	bysort npid year_vg3: egen Mwyrkinnt_c = mean(wyrkinnt_c)
		
	keep npid year_vg3 Mwyrkinnt_c 
	rename Mwyrkinnt_c wyrkinnt_c
	
	duplicates drop

	cd $pathdata_processed
	if `i' == 1 {
		save mothers_e, replace
	}
	if `i' != 1 {
		append using mothers_e
		save mothers_e, replace
	}
}

merge 1:1 npid year_vg3 using mothers, nogen

ds npid year_vg3, not
foreach var in `r(varlist)' {
	rename `var' m`var'
}

rename npid nmpid
save mothers, replace

*father info
cd $pathdata_processed
use nfpid year_vg3 if !missing(nfpid) using terminales, clear
duplicates drop

*yob
rename nfpid npid

save fathers, replace

keep npid 
duplicates drop

cd $pathdata_population
merge 1:m npid using slekt_1934-2019.dta, keep(1 3) nogen keepusing(bmonth)
 
gen yob = real(substr(string(bmonth), 1, 4))
drop bmonth
 
cd $pathdata_processed
merge 1:m npid using fathers, nogen
save fathers, replace

*education
keep npid year_vg3
bysort npid (year_vg3): gen n = _n
save temp, replace

sum n
local j `r(max)'
foreach i of numlist 1/`j' {
	use temp, clear
	keep if n == `i'

	merge 1:m npid using education_demo, keep(1 3) nogen
	keep if year<year_vg3

	foreach var in bu_kl {
		bysort npid: egen `var'_ = max(`var')
	}

	keep npid year_vg3 bu_kl_ 
	duplicates drop

	if `i' == 1 {
		save fathers_e, replace
	}
	if `i' != 1 {
		append using fathers_e
		save fathers_e, replace
	}
}

merge 1:1 npid year_vg3 using fathers, nogen
save fathers, replace

*income
keep npid year_vg3
bysort npid (year_vg3): gen n = _n
save temp, replace

sum n
local j `r(max)'
foreach i of numlist 1/`j' {
	use temp, clear
	keep if n == `i'

	expand 5
	bysort npid: gen aargang = year_vg3-6+_n
	
	merge 1:m npid aargang using ${pathdata_income}/inntekt_1993-2018.dta, keep(1 3) nogen keepusing(wyrkinnt aargang)

	bysort npid aargang (wyrkinnt): keep if _n == 1
	rename aargang year

	cd $pathdata_public_access
	merge m:1 year using inflation, keep(1 3) nogen

	preserve
	use inflation, clear
	sum yavg if year == 2007
	local pci_2007 = `r(mean)'
	restore

	foreach var in wyrkinnt {
		gen `var'_c = `var'*(`pci_2007'/yavg)
		}
	drop yavg

	foreach var in wyrkinnt_c {
		replace `var' = 0 if missing(`var')
	}
	
	bysort npid year_vg3: egen Mwyrkinnt_c = mean(wyrkinnt_c)
		
	keep npid year_vg3 Mwyrkinnt_c 
	rename Mwyrkinnt_c wyrkinnt_c
	
	duplicates drop

	cd $pathdata_processed
	if `i' == 1 {
		save fathers_e, replace
	}
	if `i' != 1 {
		append using fathers_e
		save fathers_e, replace
	}
}

merge 1:1 npid year_vg3 using fathers, nogen

ds npid year_vg3, not
foreach var in `r(varlist)' {
	rename `var' f`var'
}

rename npid nfpid
save fathers, replace

use terminales, clear
merge m:1 nmpid year_vg3 using mothers, keep(1 3) nogen
merge m:1 nfpid year_vg3 using fathers, keep(1 3) nogen

save terminales, replace
********************************************************************************	
ds *_
foreach var in `r(varlist)' {
    local l `=length("`var'")-1'
    rename `var' `=substr("`var'",1,`l')'
}
save terminales, replace

erase sample_tirage.dta
foreach path in pathdata_processed pathdata_processed_cc {
	cd $`path'
	foreach year of numlist 2005/2009 {		
		erase tirage_temp`year'.dta
		erase comb`year'_temp.dta
		erase comb`year'.dta
		erase comb2`year'.dta
		erase comb3`year'.dta
	}
}

foreach path in pathdata_processed_cc {
	cd $`path'
	erase tirage_temp.dta
}

cd $pathdata_processed
erase matieres.dta
erase education_kurs.dta
erase outcomes_education_e.dta
erase education_demo.dta
erase outcomes_education_c.dta
erase sample_peers.dta
erase temp.dta
erase peers_scores.dta
erase selectivity.dta
erase selectivity2.dta
erase employment.dta
erase outcomes_employment.dta
erase earnings.dta
erase outcomes_earnings.dta
erase mothers.dta
erase mothers_e.dta
erase fathers.dta
erase fathers_e.dta